# Reading in Tree Data and making some calculations

# read in all trees from all plots in selected inventory years on timberland from FIA database

import requests # api
import pandas as pd # managing dataframes and spped
import numpy as np # data types and spped
import time
import datetime


now = datetime.datetime.now()
day = str(now.day)
month = now.strftime("%B")[0:3]
year = now.strftime("%y")


date = day+month+year+'.csv'
date


## INPUTS ##

# plot specific
plot_col = "CN,LAT,LON,INVYR,STATECD,COUNTYCD,PREV_PLT_CN,PLOT_STATUS_CD,DESIGNCD,ECOSUBCD"
#condition specific
cond_col = "CN,PLT_CN,CONDID,CONDPROP_UNADJ,MICRPROP_UNADJ,COND_STATUS_CD,RESERVCD,SICOND,SITECLCD,FORTYPCD,ASPECT,OWNGRPCD,STDORGCD,DSTRBCD1,DSTRBCD2,DSTRBCD3,FORINDCD,CARBON_DOWN_DEAD,CARBON_LITTER,CARBON_SOIL_ORG,CARBON_STANDING_DEAD,CARBON_UNDERSTORY_AG,CARBON_UNDERSTORY_BG"
# tree specific
tree_col = "PLT_CN,CONDID,DIA,VOLCFGRS,GROWCFGS,REMVCFAL,REMVCFGS,CARBON_BG,CARBON_AG,STATUSCD,TPA_UNADJ,TPAREMV_UNADJ,TPAGROW_UNADJ,SPCD,SPGRPCD"
URL = "https://apps.fs.usda.gov/Evalidator/rest/Evalidator/refTable?colList={cols}&tableName={tab}&whereStr=INVYR>{invyr}%20AND%20STATECD%20IN%20{state}&outputFormat=JSON"

#### READ DATA AND CALCULATE ####

DFs = {}
DFtemps = {}

col = [plot_col, cond_col, tree_col]
names = ['PLOT','COND','TREE']
states = [1]
mininvyr = "1999"


start = time.time()

for state in states:
    stateval = '({})'.format(state)
    
    for idx,i in enumerate(col):
        name = names[idx]
        if name in ['PLOT','COND']:
            URL_temp = URL.format(cols = i, tab = name, invyr = mininvyr, state = stateval)

            DFtemps[name] = pd.DataFrame.from_records(requests.get(URL_temp).json()['FIADB_SQL_Output']['record'])

        else:
            # set first equal to false so that
            # we know if we have started the tree DF
            first = False
            for year in list(range(int(mininvyr)+1,2020)):
                
                # replace > with = because we will query each year individuall
                # if we dont do this we get errors because we query too many records at once
                
                URL_temp = URL.replace('>','=').format(cols = i, tab = name, invyr = year, state = stateval)
                
                try:
                    DFtemp = pd.DataFrame.from_records(requests.get(URL_temp,stream = True).json()['FIADB_SQL_Output']['record'])
  
                # this error occurs when there are no records that match our query
                # i.e. no trees recorded in one inventory year...
                
                except TypeError:
                    
                    #continue sends to the top of this loop
                    
                    continue
                    
                if first == False:
                    DFtemps[name] = DFtemp
                    first = True
                
                else:
                    DFtemps[name] = pd.concat([DFtemps[name],DFtemp],sort=False)
                
                del DFtemp
                print("processed {} for state : {} and inventory year : {}".format(name,state,year))
        
        # if its the first state in the list save the info in DFs
        if state == states[0]:
            DFs[name] = DFtemps[name]
        # if its not the first than concatenate
        else:
            DFs[name] = pd.concat([DFs[name], DFtemps[name]],sort = False)
        
        print(name)
        time.sleep(2)
        
        
    
    print("""
    state -- {} -- is processed
    """.format(state))
    
    # this shouldnt make a difference but we delete it here
    # because there are alot of records with every tree DF
    del DFtemps['TREE']
    
    # wait 10 seconds
    
    time.sleep(10)

end = time.time()
hours = (end - start)/3600

print("the query took {} hours".format(hours))

lowerize = lambda x : x.lower() 

# make sure all columns are lower case
for i in DFs:
    temp = list(DFs[i])
    lowertemp = list(map(lowerize,temp))
    DFs[i].columns = lowertemp

# merge data frames
a = DFs['TREE'].merge(DFs['COND'], how='inner',left_on=['plt_cn','condid'], right_on=['plt_cn','condid'],suffixes=('_TREE', '_COND'))
a = a.merge(DFs['PLOT'],how='inner',left_on=['plt_cn'], right_on=['cn'],suffixes=('_TREECOND', '_PLT'))

# delete columns
deletelist = ['cn_PLT']
for i in deletelist:
    del a[i]

# convert to numberic
cols=[i for i in a.columns if i not in ['ecosubcd']]
for col in cols:
    a.loc[:,col]=pd.to_numeric(a.loc[:,col])

# filter out all plots on timberland
a = a[(a.plot_status_cd == 1) & (a.cond_status_cd == 1) & (a.reservcd == 0) & (a.siteclcd.isin([1,2,3,4,5,6]))]
a = a.rename(columns={'cn_TREECOND': "cn"})

# drop duplicates to find the condprop_unadj and micrprop_unadj for each condition 
# (this should be the same for every observation)
# in this case cn is the condition cn which I took from the condition FIA table
conddf = a.drop_duplicates(subset = ['plt_cn','cn'], keep = 'first')

# rename/copy
df = a.copy()
conddrop = conddf.copy()
del a

# select subset of entire df for simplicity
condunq = conddrop[["plt_cn","cn","condprop_unadj","micrprop_unadj",
           "owngrpcd","aspect","stdorgcd","dstrbcd1","dstrbcd2","dstrbcd3","fortypcd"]]
conddrop = conddrop[["plt_cn","cn","condprop_unadj","micrprop_unadj"]]

# now i groupby plt_cn and find the sum of the unique condprop_unadj and micrprop_unadj
# reset the index so or the DF will be indexed by plt_cn and we cant use it to join
plt = conddrop.groupby(['plt_cn'])[['condprop_unadj','micrprop_unadj']].sum().reset_index()

# the summed conprop_unadj and micrpop_unadj will give us the total proportion of the plot that is included in the analysis 
plt = plt.rename(index=str, columns={"condprop_unadj": "total_cond", "micrprop_unadj": "total_micr"})

# now we can rejoin this value to our entire tree dataframe in every tree row
# this is slow becaue the tree table has a few million records
conddf = conddf.merge(plt, how='left', on='plt_cn')
df = df.merge(plt, how='left', on='plt_cn')

del plt
del conddrop

intlist = ['dstrbcd1',
           'dstrbcd2',
           'dstrbcd3',
           'owngrpcd',
           'stdorgcd']

for j in intlist:
    condunq.loc[:,j] = [0 if (x is None) or (np.isnan(x)) else int(x) for x in condunq.loc[:,j]]
                         

# getting disturbance data

dis = {}
for i in condunq.index:
    
    plt = int(condunq.at[i,'plt_cn'])
    
    dis1 = int(condunq.at[i,'dstrbcd1'])
    dis2 = int(condunq.at[i,'dstrbcd2'])
    dis3 = int(condunq.at[i,'dstrbcd3'])
    
    dislist = [dis1,dis2,dis3]
    
    try:
        
        # this is testing to see if we have a value saved
        # could also do if plt not in dis.keys()
        # but i thought this would be faster
        
        test = dis[plt]
        del test
        
    except KeyError:
        
        # creating our key value pairs with all values set to 0
        # will update values below
        
        dis[plt] = {'ins_dis':0,'fire_wth':0}   
        
    finally:
        
        for d in dislist:
            
            if (d >= 10) and (d <= 22):
                
                dis[plt]['ins_dis'] = 1
     
            elif ((d >= 30) and (d <= 32))|((d >= 50) and (d <= 54)):
                
                dis[plt]['fire_wth'] = 1


# these are binary attributes that I will assign 1 if the largest condition for each plot

misc = {}
condunq = condunq.sort_values(by = 'condprop_unadj', axis = 0, ascending = False)
pltunq = condunq.drop_duplicates(subset=['plt_cn'], keep='first')

for i in pltunq.index:
    
    plt = int(condunq.at[i,'plt_cn'])
    own = int(condunq.at[i,'owngrpcd'])
    orig = int(condunq.at[i,'stdorgcd'])
    aspect = int(condunq.at[i,'aspect'])
    fortyp = int(condunq.at[i,'fortypcd'])
    
    misc[plt] = {'stat_fed':0,'art_regen':0,'aspect':aspect,'fortypcd':fortyp}
    
    if own < 35:
        misc[plt]['stat_fed'] = 1
    if orig > 0:
        misc[plt]['art_regen'] = 1

del condunq
del pltunq

## Creating shannon diversity index ##

# get number of trees per plot
spdict = df.groupby(['plt_cn'])['spcd'].size().reset_index(name = 'total').set_index('plt_cn').to_dict()

# get number of trees by species group
spcounts = df.groupby(['plt_cn','spcd']).size().reset_index(name='counts')

# merge in number of trees
spcounts.loc[:,'num_trees'] = [spdict['total'].get(cn) for cn in spcounts.plt_cn]

# proportion calculation
spcounts.loc[:,'shan'] = -(spcounts.counts/spcounts.num_trees)*np.log(spcounts.counts/spcounts.num_trees)

# sum on the plot cn and store in a dictionary 
spfinal = spcounts.groupby(['plt_cn'])['shan'].sum().reset_index(name = 'H')
spdict = spfinal.set_index('plt_cn').to_dict()

# delete intermediate data frames
del spfinal, spcounts

# create new variables for adjusted conditions and microplot proportions
# use this for weighted average of site index
df.loc[:,'adj_condprop'] = (df.condprop_unadj/df.total_cond)
df.loc[:,'adj_micrprop'] = (df.micrprop_unadj/df.total_micr)
conddf.loc[:,'adj_condprop'] = (conddf.condprop_unadj/conddf.total_cond)
conddf.loc[:,'adj_micrprop'] = (conddf.micrprop_unadj/conddf.total_micr)
df = df.rename(index=str, columns={"total_cond_x": "total_cond", "total_micr_x": "total_micr"})
# drop duplicates so that we only have one observation per condition
df_s = conddf.drop_duplicates(subset=["plt_cn","cn"], keep='first')
# this is the weighted average of site condtion based on proportion of plot
df_s.loc[:,'adj_sicond'] = df_s.adj_condprop*df_s.sicond
# add all of the condition values together to get our weighted average
df_s = df_s.groupby(['plt_cn'])[['adj_sicond']].sum().reset_index()
# create dictionary that stores weighted average and adjusted site index 
# use this dictionary to populate our plot data
sicond_dict = df_s.set_index('plt_cn').to_dict()
del df_s
# this removes a small number of tree observations that have "No Status" 
#(i.e. they were incorrectly measured in a previous inventory or something similar)
df = df.loc[df['statuscd'] > 0,:]  
df.loc[:,'TPA'] = pd.Series(np.zeros(df.shape[0])) ###add column TPA
df.loc[:,'DTPA'] = pd.Series(np.zeros(df.shape[0])) ###add column DTPA
df.loc[:,'RTPA'] = pd.Series(np.zeros(df.shape[0])) ###add column RTPA
# this calculates TPA DTPA and RTPA measures for each column
df.loc[:,'TPA'] = [1 if x == 1 else 0 for x in df['statuscd']]
df.loc[:,'DTPA'] = [1 if x == 2 else 0 for x in df['statuscd']]
df.loc[:,'RTPA'] = [1 if (x == 3)|(y > 0) else 0 for x,y in zip(df.loc[:,'statuscd'],pd.to_numeric(df.loc[:,'remvcfgs']))]

# # making an LDIA dataframe keeping old index # # #
LDIA = pd.DataFrame(df[(df.dia >= 5) & (df.condprop_unadj > 0)])
# if any of these attributes are notna we will keep the observations and count others as 0 when we sum
# we already filtered out trees where statuscd = 0, so we dont need to filter TPA RTPA and DTPA
LDIA = pd.DataFrame(LDIA[(pd.notna(LDIA.volcfgrs)) |
(pd.notna(LDIA.remvcfgs)) |
(pd.notna(LDIA.remvcfal)) |
(pd.notna(LDIA.carbon_bg)) |
(pd.notna(LDIA.carbon_ag))
])
# making aan SDIA dataframe keepeing old index # changed this from condprop_unadj > 0 to micrprop_unadj > 0 because we were dividing by zero sometimes
SDIA = pd.DataFrame(df[(df.dia < 5) & (df.micrprop_unadj > 0)])
SDIA = pd.DataFrame(SDIA[(pd.notna(SDIA.carbon_bg)) | 
(pd.notna(SDIA.carbon_ag))])

del df

# to numeric trys to convert everything to either a float or an integer
cols=[i for i in LDIA.columns if i not in ['ecosubcd']]
for col in cols:
    LDIA[col]=pd.to_numeric(LDIA[col])

    
cols=[i for i in SDIA.columns if i not in ['ecosubcd']]
for col in cols:
    SDIA[col]=pd.to_numeric(SDIA[col])  

# cubic foot per acre to cubic meter per hectare adjustment
cubicarea = 14.291
# per acre to per hectare
hect = 2.471
# lbs per acre to metric ton per hectare
kgsarea = 0.00112085

# if there are NAN values in these calculations they are treated as NAN
# we divide by LDIA.total_cond so that all trees on a plot are weighted equally, and everything is expanded to 1 acre
# since expansion factor is derived from the size of the plot, if our entire plot is not sampled (our plot area is smaller)
# we adjust the expansion factor

LDIA.loc[:,'L_volcfgrs'] = (LDIA.volcfgrs*LDIA.tpa_unadj/LDIA.total_cond)/cubicarea
LDIA.loc[:,'L_remvcfgs'] = (LDIA.remvcfgs*LDIA.tparemv_unadj/LDIA.total_cond)/cubicarea
LDIA.loc[:,'L_remvcfal'] = (LDIA.remvcfal*LDIA.tparemv_unadj/LDIA.total_cond)/cubicarea
LDIA.loc[:,'L_carbon_bg'] = (LDIA.carbon_bg*LDIA.tpa_unadj/LDIA.total_cond)*kgsarea
LDIA.loc[:,'L_carbon_ag'] = (LDIA.carbon_ag*LDIA.tpa_unadj/LDIA.total_cond)*kgsarea
LDIA.loc[:,'L_TPA'] = (LDIA.TPA*LDIA.tpa_unadj/LDIA.total_cond)/hect
LDIA.loc[:,'L_DTPA'] = LDIA.loc[:,'DTPA']
LDIA.loc[:,'L_RTPA'] = (LDIA.RTPA*LDIA.tparemv_unadj/LDIA.total_cond)/hect
##### LIVE TREE SOUND VOLUME ####
LDIA.loc[(LDIA['TPA'] == 1), 'L_live_volcfgrs'] = LDIA['L_volcfgrs']
##### DEAD TREE SOUND VOLUME ####
LDIA.loc[(LDIA['DTPA'] == 1), 'L_dead_volcfgrs'] = LDIA['volcfgrs']
##### LIVE TREE carbon ####
LDIA.loc[(LDIA['TPA'] == 1), 'L_live_carbon'] = LDIA['L_carbon_bg'] + LDIA['L_carbon_ag']
##### DEAD TREE carbon  ####
LDIA.loc[(LDIA['DTPA'] == 1), 'L_dead_carbon'] = LDIA['L_carbon_bg'] + LDIA['L_carbon_ag']
summed = ['CARBON_DOWN_DEAD',
'CARBON_LITTER',
'CARBON_UNDERSTORY_AG',
'CARBON_UNDERSTORY_BG']
summed = [i.lower() for i in summed]

## Condition Carbon ##

# us ton per acre to metric ton per hectare
ton_hect = 2.2417
# sum of all of the carbon columns
conddf.loc[:,'cond_carbon'] = ton_hect*(conddf[summed].sum(axis=1)/conddf.total_cond)
conddf.loc[:,'under_carbon'] = ton_hect*(conddf[['carbon_understory_ag','carbon_understory_bg']].sum(axis=1)/conddf.total_cond)
conddf.loc[:,'down_dead_carbon'] = ton_hect*(conddf.carbon_down_dead/conddf.total_cond)
# sum over plots
condcarbon = conddf.groupby(['plt_cn'])[['cond_carbon','under_carbon','down_dead_carbon']].sum().reset_index()
# get our columns of interest
condcarbon = condcarbon[['plt_cn','cond_carbon','under_carbon','down_dead_carbon']]
# put it in a dictionary
carbondict = condcarbon.set_index('plt_cn').to_dict()

# summing tree values for above attributes by the listed descriptive attributes below # I removed design_cd from this lists
# this operation counts missing values as 0, but shouldnt be a big issue because of our filters above
# creates new dataframe grouped_L

grouped_L = LDIA.groupby(['plt_cn', 'prev_plt_cn', 'lat', 'lon', 'invyr', 'statecd','countycd'])[['L_live_volcfgrs',
'L_dead_volcfgrs',                                                                                   
'L_volcfgrs',
'L_remvcfal',
'L_carbon_bg',
'L_carbon_ag',
'L_TPA',
'L_DTPA',
'L_RTPA',
'L_dead_carbon',
'L_live_carbon']].sum()

# this resets the index so that we can see all of the columns that were used to group

grouped_L = grouped_L.reset_index()

# calculating area adjusted values for each tree
SDIA.loc[:,'S_carbon_bg'] = (SDIA.carbon_bg*SDIA.tpa_unadj/SDIA.total_micr)*kgsarea
SDIA.loc[:,'S_carbon_ag'] = (SDIA.carbon_ag*SDIA.tpa_unadj/SDIA.total_micr)*kgsarea
SDIA.loc[:,'S_TPA'] = (SDIA.TPA*SDIA.tpa_unadj/SDIA.total_micr)/hect
#SDIA.loc[:,'S_DTPA'] = (SDIA.DTPA*SDIA.tpa_unadj/SDIA.total_micr)/hect
SDIA.loc[:,'S_DTPA'] = SDIA.loc[:,'DTPA']
SDIA.loc[:,'S_RTPA'] = (SDIA.RTPA*SDIA.tparemv_unadj/SDIA.total_micr)/hect

##### LIVE TREE carbon ####
SDIA.loc[(SDIA['TPA'] == 1), 'S_live_carbon'] = SDIA['S_carbon_bg'] + SDIA['S_carbon_ag']
##### DEAD TREE carbon  ####
SDIA.loc[(SDIA['DTPA'] == 1), 'S_dead_carbon'] = SDIA['S_carbon_bg'] + SDIA['S_carbon_ag']

# this operation counts missing values as 0
# creates new dataframe grouped_S
grouped_S = SDIA.groupby(['plt_cn', 'prev_plt_cn', 'lat', 'lon', 'invyr', 'statecd','countycd'])[['S_carbon_bg',
'S_carbon_ag',
'S_TPA',
'S_DTPA',
'S_RTPA',
'S_live_carbon',
'S_dead_carbon']].sum()

grouped_S = grouped_S.reset_index()

# the difference in number of rows means that some plots will not have SDIA or LDIA data (most likely SDIA)
# we will count this data as 0 when it is joined

del LDIA
del SDIA

all_plots = grouped_L.merge(grouped_S,
                            how = 'outer',
                            left_on = ['plt_cn','prev_plt_cn','lat','lon','invyr','statecd','countycd'],
                           right_on = ['plt_cn','prev_plt_cn','lat','lon','invyr','statecd','countycd'])

all_plots = all_plots.reset_index()

del grouped_L
del grouped_S



# add in the adjusted site index

all_plots.loc[:,'adj_sicond'] = [sicond_dict.get(int(cn)) for cn in all_plots.plt_cn] 


# This creates new variables for carbon,..., RTPA that are combined small and large diameter trees
# we use fill_value here so that the plots with missing either LDIA or SDIA observations 
# are not counted as 0 because of NA values
# the fill_value will not create any new 0 values because all plots should have at least one cabon measurment 
# (because of my filters above)

all_plots.loc[:,'S_carbon'] = all_plots.S_carbon_ag.add(all_plots.S_carbon_bg, fill_value=0)         
all_plots.loc[:,'L_carbon'] = all_plots.L_carbon_ag.add(all_plots.L_carbon_bg, fill_value=0)
all_plots.loc[:,'carbon'] = all_plots.L_carbon.add(all_plots.S_carbon, fill_value=0)

all_plots.loc[:,'deadtree_carbon'] = all_plots.L_dead_carbon.add(all_plots.S_dead_carbon, fill_value=0)
all_plots.loc[:,'livetree_carbon'] = all_plots.L_live_carbon.add(all_plots.S_live_carbon, fill_value=0)

all_plots.loc[:,'TPA'] = all_plots.L_TPA.add(all_plots.S_TPA, fill_value=0)                            
all_plots.loc[:,'DTPA'] = all_plots.L_DTPA.add(all_plots.S_DTPA, fill_value=0)                         
all_plots.loc[:,'RTPA'] = all_plots.L_RTPA.add(all_plots.S_RTPA, fill_value=0)     

a = all_plots.copy()

ecosubs = conddf[['ecosubcd','plt_cn']]
ecosubs = ecosubs.set_index('plt_cn')
ecodict = ecosubs.to_dict()

# adding condition level vars

j = 0
k = 0
z = 0
l = 0 
t = 0 
y = 0

for i in a.index:
    cn = a.at[i,'plt_cn']

    #########
    #########
    # adding some updated attributes
    #########
    #########    
    
    try:
        own = misc[int(cn)]['stat_fed']
        a.at[i,'stat_fed'] = own
        
        regen = misc[int(cn)]['art_regen']
        a.at[i,'art_regen'] = regen
        
        aspect = misc[int(cn)]['aspect']
        a.at[i,'aspect'] = aspect
        
        fortyp = misc[int(cn)]['fortypcd']
        a.at[i,'fortypcd'] = fortyp
        
    except KeyError:
        pass
    try:
        
        ins = dis[int(cn)]['ins_dis'] 
        a.at[i,'ins_dis'] = ins
        
        
        fire = dis[int(cn)]['fire_wth']
        a.at[i,'fire_wth'] = fire
        
    except KeyError:
        pass
    
    try:
        
        othercarbon = carbondict['cond_carbon'][int(cn)]
        a.at[i,'cond_carbon'] = othercarbon

        ucarbon = carbondict['under_carbon'][int(cn)]
        a.at[i,'under_carbon'] = ucarbon

        downdead = carbondict['down_dead_carbon'][int(cn)]
        a.at[i,'down_dead_carbon'] = downdead

        ecosub = ecodict['ecosubcd'][int(cn)]
        a.at[i,'ecosubcd'] = ecosub

        
    except KeyError:
        pass

    try:

        shannon = carbondict['H'][int(cn)]
        a.at[i,'H'] = shannon

    except KeyError:
        pass

# create binary for private an native american ownership
a.loc[:,'private'] = [1 if int(i) == 0 else 0 for i in list(a.stat_fed.fillna(0))]

# getting all of the carbon data in one attribute
a.loc[:,'carbon_total'] = a.carbon + a.cond_carbon

# now we need delete all of the dictionaries that we created...
del ecodict
del dis
del carbondict

a.to_csv("data.csv")